{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "da309012-e70b-4a66-9531-8e7beaef401a",
   "metadata": {},
   "source": [
    "## 决策树，随机森林"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9f090ac7-c652-41f7-99ff-1da6fc689615",
   "metadata": {},
   "source": [
    "决策树"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2555fa28-8a21-4d02-9588-d3f54fd37360",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "8546eb3f-ff2b-4a4a-9781-ce49518b57c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "from sklearn import tree\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import roc_curve\n",
    "from sklearn.metrics import auc\n",
    "\n",
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = \"all\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "c85bc6a0-1ad3-4a12-ab80-52a708e3457f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1000 entries, 0 to 999\n",
      "Data columns (total 21 columns):\n",
      " #   Column    Non-Null Count  Dtype \n",
      "---  ------    --------------  ----- \n",
      " 0   checking  1000 non-null   int64 \n",
      " 1   duration  1000 non-null   int64 \n",
      " 2   history   1000 non-null   int64 \n",
      " 3   purpose   1000 non-null   object\n",
      " 4   amount    1000 non-null   int64 \n",
      " 5   savings   1000 non-null   int64 \n",
      " 6   employed  1000 non-null   int64 \n",
      " 7   installp  1000 non-null   int64 \n",
      " 8   marital   1000 non-null   int64 \n",
      " 9   coapp     1000 non-null   int64 \n",
      " 10  resident  1000 non-null   int64 \n",
      " 11  property  1000 non-null   int64 \n",
      " 12  age       1000 non-null   int64 \n",
      " 13  other     1000 non-null   int64 \n",
      " 14  housing   1000 non-null   int64 \n",
      " 15  existcr   1000 non-null   int64 \n",
      " 16  job       1000 non-null   int64 \n",
      " 17  depends   1000 non-null   int64 \n",
      " 18  telephon  1000 non-null   int64 \n",
      " 19  foreign   1000 non-null   int64 \n",
      " 20  good_bad  1000 non-null   object\n",
      "dtypes: int64(19), object(2)\n",
      "memory usage: 164.2+ KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>checking</th>\n",
       "      <th>duration</th>\n",
       "      <th>history</th>\n",
       "      <th>amount</th>\n",
       "      <th>savings</th>\n",
       "      <th>employed</th>\n",
       "      <th>installp</th>\n",
       "      <th>marital</th>\n",
       "      <th>coapp</th>\n",
       "      <th>resident</th>\n",
       "      <th>property</th>\n",
       "      <th>age</th>\n",
       "      <th>other</th>\n",
       "      <th>housing</th>\n",
       "      <th>existcr</th>\n",
       "      <th>job</th>\n",
       "      <th>depends</th>\n",
       "      <th>telephon</th>\n",
       "      <th>foreign</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.00000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.00000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.577000</td>\n",
       "      <td>20.903000</td>\n",
       "      <td>2.54500</td>\n",
       "      <td>3271.258000</td>\n",
       "      <td>2.105000</td>\n",
       "      <td>3.384000</td>\n",
       "      <td>2.973000</td>\n",
       "      <td>2.68200</td>\n",
       "      <td>1.145000</td>\n",
       "      <td>2.845000</td>\n",
       "      <td>2.358000</td>\n",
       "      <td>35.546000</td>\n",
       "      <td>2.675000</td>\n",
       "      <td>1.929000</td>\n",
       "      <td>1.407000</td>\n",
       "      <td>2.904000</td>\n",
       "      <td>1.155000</td>\n",
       "      <td>1.404000</td>\n",
       "      <td>1.037000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.257638</td>\n",
       "      <td>12.058814</td>\n",
       "      <td>1.08312</td>\n",
       "      <td>2822.736876</td>\n",
       "      <td>1.580023</td>\n",
       "      <td>1.208306</td>\n",
       "      <td>1.118715</td>\n",
       "      <td>0.70808</td>\n",
       "      <td>0.477706</td>\n",
       "      <td>1.103718</td>\n",
       "      <td>1.050209</td>\n",
       "      <td>11.375469</td>\n",
       "      <td>0.705601</td>\n",
       "      <td>0.531264</td>\n",
       "      <td>0.577654</td>\n",
       "      <td>0.653614</td>\n",
       "      <td>0.362086</td>\n",
       "      <td>0.490943</td>\n",
       "      <td>0.188856</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>250.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>19.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>12.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>1365.500000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>27.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>2.000000</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>2.00000</td>\n",
       "      <td>2319.500000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>33.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>4.000000</td>\n",
       "      <td>24.000000</td>\n",
       "      <td>4.00000</td>\n",
       "      <td>3972.250000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>3.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>42.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4.000000</td>\n",
       "      <td>72.000000</td>\n",
       "      <td>4.00000</td>\n",
       "      <td>18424.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.00000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>75.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          checking     duration     history        amount      savings  \\\n",
       "count  1000.000000  1000.000000  1000.00000   1000.000000  1000.000000   \n",
       "mean      2.577000    20.903000     2.54500   3271.258000     2.105000   \n",
       "std       1.257638    12.058814     1.08312   2822.736876     1.580023   \n",
       "min       1.000000     4.000000     0.00000    250.000000     1.000000   \n",
       "25%       1.000000    12.000000     2.00000   1365.500000     1.000000   \n",
       "50%       2.000000    18.000000     2.00000   2319.500000     1.000000   \n",
       "75%       4.000000    24.000000     4.00000   3972.250000     3.000000   \n",
       "max       4.000000    72.000000     4.00000  18424.000000     5.000000   \n",
       "\n",
       "          employed     installp     marital        coapp     resident  \\\n",
       "count  1000.000000  1000.000000  1000.00000  1000.000000  1000.000000   \n",
       "mean      3.384000     2.973000     2.68200     1.145000     2.845000   \n",
       "std       1.208306     1.118715     0.70808     0.477706     1.103718   \n",
       "min       1.000000     1.000000     1.00000     1.000000     1.000000   \n",
       "25%       3.000000     2.000000     2.00000     1.000000     2.000000   \n",
       "50%       3.000000     3.000000     3.00000     1.000000     3.000000   \n",
       "75%       5.000000     4.000000     3.00000     1.000000     4.000000   \n",
       "max       5.000000     4.000000     4.00000     3.000000     4.000000   \n",
       "\n",
       "          property          age        other      housing      existcr  \\\n",
       "count  1000.000000  1000.000000  1000.000000  1000.000000  1000.000000   \n",
       "mean      2.358000    35.546000     2.675000     1.929000     1.407000   \n",
       "std       1.050209    11.375469     0.705601     0.531264     0.577654   \n",
       "min       1.000000    19.000000     1.000000     1.000000     1.000000   \n",
       "25%       1.000000    27.000000     3.000000     2.000000     1.000000   \n",
       "50%       2.000000    33.000000     3.000000     2.000000     1.000000   \n",
       "75%       3.000000    42.000000     3.000000     2.000000     2.000000   \n",
       "max       4.000000    75.000000     3.000000     3.000000     4.000000   \n",
       "\n",
       "               job      depends     telephon      foreign  \n",
       "count  1000.000000  1000.000000  1000.000000  1000.000000  \n",
       "mean      2.904000     1.155000     1.404000     1.037000  \n",
       "std       0.653614     0.362086     0.490943     0.188856  \n",
       "min       1.000000     1.000000     1.000000     1.000000  \n",
       "25%       3.000000     1.000000     1.000000     1.000000  \n",
       "50%       3.000000     1.000000     1.000000     1.000000  \n",
       "75%       3.000000     1.000000     2.000000     1.000000  \n",
       "max       4.000000     2.000000     2.000000     2.000000  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "checking    0\n",
       "duration    0\n",
       "history     0\n",
       "purpose     0\n",
       "amount      0\n",
       "savings     0\n",
       "employed    0\n",
       "installp    0\n",
       "marital     0\n",
       "coapp       0\n",
       "resident    0\n",
       "property    0\n",
       "age         0\n",
       "other       0\n",
       "housing     0\n",
       "existcr     0\n",
       "job         0\n",
       "depends     0\n",
       "telephon    0\n",
       "foreign     0\n",
       "good_bad    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "0      False\n",
       "1      False\n",
       "2      False\n",
       "3      False\n",
       "4      False\n",
       "       ...  \n",
       "995    False\n",
       "996    False\n",
       "997    False\n",
       "998    False\n",
       "999    False\n",
       "Length: 1000, dtype: bool"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd \n",
    "credit_df = pd.read_csv('credit.csv')\n",
    "credit_df.info() \n",
    "credit_df.describe() \n",
    "credit_df.isnull().sum() \n",
    "credit_df.duplicated()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "a2c7bfa1-4bae-4f9b-97ab-936c81e8e40d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "purpose\n",
      "3    280\n",
      "0    234\n",
      "2    181\n",
      "1    103\n",
      "9     97\n",
      "6     50\n",
      "5     22\n",
      "4     12\n",
      "X     12\n",
      "8      9\n",
      "Name: count, dtype: int64\n",
      "good_bad\n",
      "good    700\n",
      "bad     300\n",
      "Name: count, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 通过value_counts()函数对类别型变量进行频数统计\n",
    "print(credit_df.purpose.value_counts())\n",
    "print(credit_df.good_bad.value_counts())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "3ee6b486-344e-4233-b412-926d1d19b89e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>purpose_0</th>\n",
       "      <th>purpose_1</th>\n",
       "      <th>purpose_2</th>\n",
       "      <th>purpose_3</th>\n",
       "      <th>purpose_4</th>\n",
       "      <th>purpose_5</th>\n",
       "      <th>purpose_6</th>\n",
       "      <th>purpose_8</th>\n",
       "      <th>purpose_9</th>\n",
       "      <th>purpose_X</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   purpose_0  purpose_1  purpose_2  purpose_3  purpose_4  purpose_5  \\\n",
       "0      False      False      False       True      False      False   \n",
       "1      False      False      False       True      False      False   \n",
       "2      False      False      False      False      False      False   \n",
       "3      False      False       True      False      False      False   \n",
       "4       True      False      False      False      False      False   \n",
       "\n",
       "   purpose_6  purpose_8  purpose_9  purpose_X  \n",
       "0      False      False      False      False  \n",
       "1      False      False      False      False  \n",
       "2       True      False      False      False  \n",
       "3      False      False      False      False  \n",
       "4      False      False      False      False  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 把类别型变量进行独热编码(1->N)\n",
    "checking = pd.get_dummies(credit_df.checking,prefix='checking')\n",
    "history = pd.get_dummies(credit_df.history,prefix='history')\n",
    "purpose = pd.get_dummies(credit_df.purpose,prefix='purpose')\n",
    "savings = pd.get_dummies(credit_df.savings,prefix='savings')\n",
    "employed = pd.get_dummies(credit_df.employed,prefix='employed')\n",
    "installp = pd.get_dummies(credit_df.installp,prefix='installp')\n",
    "marital = pd.get_dummies(credit_df.marital,prefix='marital')\n",
    "coapp = pd.get_dummies(credit_df.coapp,prefix='coapp')\n",
    "installp = pd.get_dummies(credit_df.installp,prefix='installp')\n",
    "resident = pd.get_dummies(credit_df.resident,prefix='resident')\n",
    "property = pd.get_dummies(credit_df.property,prefix='property')\n",
    "housing = pd.get_dummies(credit_df.housing,prefix='housing')\n",
    "existcr = pd.get_dummies(credit_df.existcr,prefix='existcr')\n",
    "job = pd.get_dummies(credit_df.job,prefix='job')\n",
    "depends = pd.get_dummies(credit_df.depends,prefix='depends')\n",
    "telephon = pd.get_dummies(credit_df.telephon,prefix='telephon')\n",
    "foreign = pd.get_dummies(credit_df.foreign,prefix='foreign')\n",
    "\n",
    "# 随便查看一个编码后的数据\n",
    "purpose.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "878d9595-723c-4def-8e90-7332a63e41e4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>duration</th>\n",
       "      <th>amount</th>\n",
       "      <th>age</th>\n",
       "      <th>checking_1</th>\n",
       "      <th>checking_2</th>\n",
       "      <th>checking_3</th>\n",
       "      <th>checking_4</th>\n",
       "      <th>history_0</th>\n",
       "      <th>history_1</th>\n",
       "      <th>history_2</th>\n",
       "      <th>...</th>\n",
       "      <th>job_1</th>\n",
       "      <th>job_2</th>\n",
       "      <th>job_3</th>\n",
       "      <th>job_4</th>\n",
       "      <th>depends_1</th>\n",
       "      <th>depends_2</th>\n",
       "      <th>telephon_1</th>\n",
       "      <th>telephon_2</th>\n",
       "      <th>foreign_1</th>\n",
       "      <th>foreign_2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>1169</td>\n",
       "      <td>67</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>48</td>\n",
       "      <td>5951</td>\n",
       "      <td>22</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12</td>\n",
       "      <td>2096</td>\n",
       "      <td>49</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>42</td>\n",
       "      <td>7882</td>\n",
       "      <td>45</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24</td>\n",
       "      <td>4870</td>\n",
       "      <td>53</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 72 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   duration  amount  age  checking_1  checking_2  checking_3  checking_4  \\\n",
       "0         6    1169   67        True       False       False       False   \n",
       "1        48    5951   22       False        True       False       False   \n",
       "2        12    2096   49       False       False       False        True   \n",
       "3        42    7882   45        True       False       False       False   \n",
       "4        24    4870   53        True       False       False       False   \n",
       "\n",
       "   history_0  history_1  history_2  ...  job_1  job_2  job_3  job_4  \\\n",
       "0      False      False      False  ...  False  False   True  False   \n",
       "1      False      False       True  ...  False  False   True  False   \n",
       "2      False      False      False  ...  False   True  False  False   \n",
       "3      False      False       True  ...  False  False   True  False   \n",
       "4      False      False      False  ...  False  False   True  False   \n",
       "\n",
       "   depends_1  depends_2  telephon_1  telephon_2  foreign_1  foreign_2  \n",
       "0       True      False       False        True       True      False  \n",
       "1       True      False        True       False       True      False  \n",
       "2      False       True        True       False       True      False  \n",
       "3      False       True        True       False       True      False  \n",
       "4      False       True        True       False       True      False  \n",
       "\n",
       "[5 rows x 72 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 利用concat()函数把转换后的数据对象全部合并在一起变成新的数据对象，命名为trainData_X\n",
    "trainData_X = pd.concat([credit_df.duration, credit_df.amount, credit_df.age, checking, history, purpose, savings, employed, installp, marital, coapp, installp, resident, property, housing, existcr, job, depends, telephon, foreign], axis=1)\n",
    "trainData_X.head()\n",
    "# 72 columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "34d5c886-fc7e-4ecf-866b-51e9edd17121",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "target\n",
       "0    700\n",
       "1    300\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 构造目标变量y的数据对象\n",
    "# 当前的目标变量good_bad的取值是'good'和'bad'，一般我们习惯把二分类的类别值编码为0和1，1一般表示类别比较少的那一类，他们往往是我们感兴趣的\n",
    "credit_df['target'] = 0\n",
    "credit_df.loc[(credit_df.good_bad == 'bad'), 'target'] = 1\n",
    "\n",
    "trainData_y = credit_df['target'] \n",
    "trainData_y.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "aa0e8400-75aa-4868-a7ca-4fe29468c6c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 拆分训练集和测试集\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(trainData_X, trainData_y, test_size=0.3, random_state=2048)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "6fd2d9af-717e-4400-b155-da199ad359cb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(700, 72) (300, 72)\n"
     ]
    }
   ],
   "source": [
    "print(X_train.shape,X_test.shape)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "06d54922-8ab7-4d82-bba9-62eb1ae89f3b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>duration</th>\n",
       "      <th>amount</th>\n",
       "      <th>age</th>\n",
       "      <th>checking_1</th>\n",
       "      <th>checking_2</th>\n",
       "      <th>checking_3</th>\n",
       "      <th>checking_4</th>\n",
       "      <th>history_0</th>\n",
       "      <th>history_1</th>\n",
       "      <th>history_2</th>\n",
       "      <th>...</th>\n",
       "      <th>job_1</th>\n",
       "      <th>job_2</th>\n",
       "      <th>job_3</th>\n",
       "      <th>job_4</th>\n",
       "      <th>depends_1</th>\n",
       "      <th>depends_2</th>\n",
       "      <th>telephon_1</th>\n",
       "      <th>telephon_2</th>\n",
       "      <th>foreign_1</th>\n",
       "      <th>foreign_2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>197</th>\n",
       "      <td>12</td>\n",
       "      <td>951</td>\n",
       "      <td>27</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>250</th>\n",
       "      <td>6</td>\n",
       "      <td>666</td>\n",
       "      <td>39</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>420</th>\n",
       "      <td>15</td>\n",
       "      <td>3186</td>\n",
       "      <td>20</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>202</th>\n",
       "      <td>27</td>\n",
       "      <td>5117</td>\n",
       "      <td>26</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>623</th>\n",
       "      <td>12</td>\n",
       "      <td>1858</td>\n",
       "      <td>22</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>231</th>\n",
       "      <td>9</td>\n",
       "      <td>2507</td>\n",
       "      <td>51</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>258</th>\n",
       "      <td>15</td>\n",
       "      <td>3812</td>\n",
       "      <td>23</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>782</th>\n",
       "      <td>12</td>\n",
       "      <td>1410</td>\n",
       "      <td>31</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>613</th>\n",
       "      <td>24</td>\n",
       "      <td>3632</td>\n",
       "      <td>22</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>409</th>\n",
       "      <td>12</td>\n",
       "      <td>939</td>\n",
       "      <td>28</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>300 rows × 72 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     duration  amount  age  checking_1  checking_2  checking_3  checking_4  \\\n",
       "197        12     951   27       False        True       False       False   \n",
       "250         6     666   39        True       False       False       False   \n",
       "420        15    3186   20       False       False       False        True   \n",
       "202        27    5117   26       False       False       False        True   \n",
       "623        12    1858   22        True       False       False       False   \n",
       "..        ...     ...  ...         ...         ...         ...         ...   \n",
       "231         9    2507   51       False       False       False        True   \n",
       "258        15    3812   23       False       False       False        True   \n",
       "782        12    1410   31       False        True       False       False   \n",
       "613        24    3632   22        True       False       False       False   \n",
       "409        12     939   28       False       False        True       False   \n",
       "\n",
       "     history_0  history_1  history_2  ...  job_1  job_2  job_3  job_4  \\\n",
       "197      False      False       True  ...  False  False   True  False   \n",
       "250      False      False      False  ...  False   True  False  False   \n",
       "420      False      False       True  ...  False  False   True  False   \n",
       "202      False      False      False  ...  False  False   True  False   \n",
       "623      False      False       True  ...  False  False   True  False   \n",
       "..         ...        ...        ...  ...    ...    ...    ...    ...   \n",
       "231      False      False       True  ...  False   True  False  False   \n",
       "258      False      False       True  ...  False  False   True  False   \n",
       "782       True      False      False  ...  False   True  False  False   \n",
       "613      False       True      False  ...  False  False   True  False   \n",
       "409      False      False      False  ...  False  False   True  False   \n",
       "\n",
       "     depends_1  depends_2  telephon_1  telephon_2  foreign_1  foreign_2  \n",
       "197       True      False        True       False       True      False  \n",
       "250       True      False       False        True       True      False  \n",
       "420       True      False        True       False       True      False  \n",
       "202       True      False        True       False       True      False  \n",
       "623       True      False        True       False       True      False  \n",
       "..         ...        ...         ...         ...        ...        ...  \n",
       "231       True      False        True       False       True      False  \n",
       "258       True      False       False        True       True      False  \n",
       "782       True      False       False        True       True      False  \n",
       "613       True      False        True       False      False       True  \n",
       "409       True      False       False        True       True      False  \n",
       "\n",
       "[300 rows x 72 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "5b08c9bf-553d-4b62-a835-279d6aded636",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "target\n",
       "0    199\n",
       "1    101\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "542d7521-03f8-4989-822f-a3194e4f4709",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-2 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-2 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-2 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-2 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-2 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-2 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-2 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-2 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-2 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>DecisionTreeClassifier(max_depth=10)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;DecisionTreeClassifier<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.tree.DecisionTreeClassifier.html\">?<span>Documentation for DecisionTreeClassifier</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>DecisionTreeClassifier(max_depth=10)</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "DecisionTreeClassifier(max_depth=10)"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "\n",
    "dtree = DecisionTreeClassifier(max_depth=10)\n",
    "dtree.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "83efdcef-c494-4b47-9db0-e04bc3f22659",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.10577557, 0.15807809, 0.06971197, 0.00724386, 0.        ,\n",
       "       0.        , 0.12837154, 0.02479451, 0.04281327, 0.011716  ,\n",
       "       0.00436754, 0.01235984, 0.        , 0.01640178, 0.00941954,\n",
       "       0.        , 0.        , 0.00651948, 0.00177266, 0.        ,\n",
       "       0.02925399, 0.        , 0.04946573, 0.02740458, 0.00705467,\n",
       "       0.        , 0.        , 0.02471968, 0.00178414, 0.00792297,\n",
       "       0.        , 0.02052866, 0.        , 0.        , 0.01727209,\n",
       "       0.00950757, 0.        , 0.        , 0.02017647, 0.        ,\n",
       "       0.01730297, 0.01044812, 0.01523301, 0.00713068, 0.        ,\n",
       "       0.00282963, 0.00930225, 0.00916801, 0.        , 0.        ,\n",
       "       0.        , 0.02213906, 0.02097184, 0.03284741, 0.        ,\n",
       "       0.        , 0.        , 0.        , 0.00578104, 0.        ,\n",
       "       0.        , 0.        , 0.        , 0.00633838, 0.        ,\n",
       "       0.00547168, 0.        , 0.        , 0.01426135, 0.        ,\n",
       "       0.00633838, 0.        ])"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dtree.feature_importances_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "34930dd2-c7a0-45ad-ad50-a2ce408e7737",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature</th>\n",
       "      <th>importance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>amount</td>\n",
       "      <td>0.158078</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>checking_4</td>\n",
       "      <td>0.128372</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>duration</td>\n",
       "      <td>0.105776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>age</td>\n",
       "      <td>0.069712</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>savings_1</td>\n",
       "      <td>0.049466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>history_1</td>\n",
       "      <td>0.042813</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>property_3</td>\n",
       "      <td>0.032847</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>purpose_9</td>\n",
       "      <td>0.029254</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>savings_2</td>\n",
       "      <td>0.027405</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>history_0</td>\n",
       "      <td>0.024795</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>employed_1</td>\n",
       "      <td>0.024720</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>property_1</td>\n",
       "      <td>0.022139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>property_2</td>\n",
       "      <td>0.020972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>employed_5</td>\n",
       "      <td>0.020529</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>marital_3</td>\n",
       "      <td>0.020176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>coapp_1</td>\n",
       "      <td>0.017303</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>installp_3</td>\n",
       "      <td>0.017272</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>purpose_1</td>\n",
       "      <td>0.016402</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>coapp_3</td>\n",
       "      <td>0.015233</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>telephon_1</td>\n",
       "      <td>0.014261</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       feature  importance\n",
       "1       amount    0.158078\n",
       "6   checking_4    0.128372\n",
       "0     duration    0.105776\n",
       "2          age    0.069712\n",
       "22   savings_1    0.049466\n",
       "8    history_1    0.042813\n",
       "53  property_3    0.032847\n",
       "20   purpose_9    0.029254\n",
       "23   savings_2    0.027405\n",
       "7    history_0    0.024795\n",
       "27  employed_1    0.024720\n",
       "51  property_1    0.022139\n",
       "52  property_2    0.020972\n",
       "31  employed_5    0.020529\n",
       "38   marital_3    0.020176\n",
       "40     coapp_1    0.017303\n",
       "34  installp_3    0.017272\n",
       "13   purpose_1    0.016402\n",
       "42     coapp_3    0.015233\n",
       "68  telephon_1    0.014261"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看特征的重要性排序\n",
    "import pandas as pd\n",
    "features = pd.DataFrame()\n",
    "features['feature'] = X_train.columns\n",
    "features['importance'] = dtree.feature_importances_\n",
    "features = features.sort_values(by =['importance'], ascending=False)\n",
    "\n",
    "# 显示最重要的前10个特征变量\n",
    "features.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "ae27b8a7-236b-4bfd-9cc7-48d772cadc1e",
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'pydotplus'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[60], line 2\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mIPython\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdisplay\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Image\n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpydotplus\u001b[39;00m\n\u001b[0;32m      4\u001b[0m dot_data \u001b[38;5;241m=\u001b[39m tree\u001b[38;5;241m.\u001b[39mexport_graphviz(dtree, out_file\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \n\u001b[0;32m      5\u001b[0m                          feature_names\u001b[38;5;241m=\u001b[39mX_train\u001b[38;5;241m.\u001b[39mcolumns,  \n\u001b[0;32m      6\u001b[0m                          class_names\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgood\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbad\u001b[39m\u001b[38;5;124m'\u001b[39m],  \n\u001b[0;32m      7\u001b[0m                          filled\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, rounded\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,  \n\u001b[0;32m      8\u001b[0m                          special_characters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)  \n\u001b[0;32m      9\u001b[0m graph \u001b[38;5;241m=\u001b[39m pydotplus\u001b[38;5;241m.\u001b[39mgraph_from_dot_data(dot_data)  \n",
      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'pydotplus'"
     ]
    }
   ],
   "source": [
    "from IPython.display import Image\n",
    "import pydotplus\n",
    "\n",
    "dot_data = tree.export_graphviz(dtree, out_file=None, \n",
    "                         feature_names=X_train.columns,  \n",
    "                         class_names=['good','bad'],  \n",
    "                         filled=True, rounded=True,  \n",
    "                         special_characters=True)  \n",
    "graph = pydotplus.graph_from_dot_data(dot_data)  \n",
    "\n",
    "Image(graph.create_png()) \n",
    "graph.write_pdf(\"dtree.pdf\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "18bc65e7-0d2b-40ba-9b87-9c817f7cf4c1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-3 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-3 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-3 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-3 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-3 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-3 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-3 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-3 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-3 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-3 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-3 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-3 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-3 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(cv=5, estimator=DecisionTreeClassifier(),\n",
       "             param_grid={&#x27;criterion&#x27;: [&#x27;gini&#x27;, &#x27;entropy&#x27;],\n",
       "                         &#x27;max_depth&#x27;: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]},\n",
       "             scoring=&#x27;accuracy&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;GridSearchCV<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.model_selection.GridSearchCV.html\">?<span>Documentation for GridSearchCV</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>GridSearchCV(cv=5, estimator=DecisionTreeClassifier(),\n",
       "             param_grid={&#x27;criterion&#x27;: [&#x27;gini&#x27;, &#x27;entropy&#x27;],\n",
       "                         &#x27;max_depth&#x27;: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]},\n",
       "             scoring=&#x27;accuracy&#x27;)</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">estimator: DecisionTreeClassifier</label><div class=\"sk-toggleable__content fitted\"><pre>DecisionTreeClassifier()</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;DecisionTreeClassifier<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.tree.DecisionTreeClassifier.html\">?<span>Documentation for DecisionTreeClassifier</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>DecisionTreeClassifier()</pre></div> </div></div></div></div></div></div></div></div></div>"
      ],
      "text/plain": [
       "GridSearchCV(cv=5, estimator=DecisionTreeClassifier(),\n",
       "             param_grid={'criterion': ['gini', 'entropy'],\n",
       "                         'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]},\n",
       "             scoring='accuracy')"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DecisionTreeClassifier(max_depth=7)\n",
      "0.7200000000000001\n",
      "{'criterion': 'gini', 'max_depth': 7}\n"
     ]
    }
   ],
   "source": [
    "# grid search调参\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "parameters = {\n",
    "    'criterion':['gini','entropy'],\n",
    "    'max_depth':[1,2,3,4,5,6,7,8,9,10,11,12]\n",
    "}\n",
    "\n",
    "dtree = tree.DecisionTreeClassifier()\n",
    "dtree_search = GridSearchCV(dtree, parameters, scoring='accuracy', cv=5)\n",
    "dtree_search.fit(X_train, y_train)\n",
    "\n",
    "#查看最佳结果\n",
    "print(dtree_search.best_estimator_)\n",
    "print(dtree_search.best_score_)\n",
    "print(dtree_search.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "90e5ca08-3731-4c0d-98e2-a125f4b264bc",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "abc6290f-b7ba-41ce-b6c3-39637eb246e5",
   "metadata": {},
   "source": [
    "## 作业"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "39b0e2ce-83af-4546-9a79-712a3666ee4e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "from sklearn import tree\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import roc_curve\n",
    "from sklearn.metrics import auc\n",
    "\n",
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = \"all\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "0d8f9890-c4e0-432e-b294-8099d94838ac",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 149 entries, 0 to 148\n",
      "Data columns (total 5 columns):\n",
      " #   Column       Non-Null Count  Dtype  \n",
      "---  ------       --------------  -----  \n",
      " 0   5.1          149 non-null    float64\n",
      " 1   3.5          149 non-null    float64\n",
      " 2   1.4          149 non-null    float64\n",
      " 3   0.2          149 non-null    float64\n",
      " 4   Iris-setosa  149 non-null    object \n",
      "dtypes: float64(4), object(1)\n",
      "memory usage: 5.9+ KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>5.1</th>\n",
       "      <th>3.5</th>\n",
       "      <th>1.4</th>\n",
       "      <th>0.2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>149.000000</td>\n",
       "      <td>149.000000</td>\n",
       "      <td>149.000000</td>\n",
       "      <td>149.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>5.848322</td>\n",
       "      <td>3.051007</td>\n",
       "      <td>3.774497</td>\n",
       "      <td>1.205369</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.828594</td>\n",
       "      <td>0.433499</td>\n",
       "      <td>1.759651</td>\n",
       "      <td>0.761292</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>4.300000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.100000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>5.100000</td>\n",
       "      <td>2.800000</td>\n",
       "      <td>1.600000</td>\n",
       "      <td>0.300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>5.800000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.400000</td>\n",
       "      <td>1.300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>6.400000</td>\n",
       "      <td>3.300000</td>\n",
       "      <td>5.100000</td>\n",
       "      <td>1.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>7.900000</td>\n",
       "      <td>4.400000</td>\n",
       "      <td>6.900000</td>\n",
       "      <td>2.500000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              5.1         3.5         1.4         0.2\n",
       "count  149.000000  149.000000  149.000000  149.000000\n",
       "mean     5.848322    3.051007    3.774497    1.205369\n",
       "std      0.828594    0.433499    1.759651    0.761292\n",
       "min      4.300000    2.000000    1.000000    0.100000\n",
       "25%      5.100000    2.800000    1.600000    0.300000\n",
       "50%      5.800000    3.000000    4.400000    1.300000\n",
       "75%      6.400000    3.300000    5.100000    1.800000\n",
       "max      7.900000    4.400000    6.900000    2.500000"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "5.1            0\n",
       "3.5            0\n",
       "1.4            0\n",
       "0.2            0\n",
       "Iris-setosa    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "0      False\n",
       "1      False\n",
       "2      False\n",
       "3      False\n",
       "4      False\n",
       "       ...  \n",
       "144    False\n",
       "145    False\n",
       "146    False\n",
       "147    False\n",
       "148    False\n",
       "Length: 149, dtype: bool"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd \n",
    "df = pd.read_csv('irisdata.txt')\n",
    "df.info() \n",
    "df.describe() \n",
    "df.isnull().sum() \n",
    "df.duplicated()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "8fca337c-4b10-487e-bd94-7498fcbc842e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5.1  3.5  1.4  0.2  Iris-setosa    \n",
      "4.9  3.1  1.5  0.1  Iris-setosa        3\n",
      "5.8  2.7  5.1  1.9  Iris-virginica     2\n",
      "6.2  3.4  5.4  2.3  Iris-virginica     1\n",
      "6.3  2.3  4.4  1.3  Iris-versicolor    1\n",
      "     2.5  4.9  1.5  Iris-versicolor    1\n",
      "                                      ..\n",
      "5.5  2.4  3.7  1.0  Iris-versicolor    1\n",
      "          3.8  1.1  Iris-versicolor    1\n",
      "     2.5  4.0  1.3  Iris-versicolor    1\n",
      "     2.6  4.4  1.2  Iris-versicolor    1\n",
      "7.9  3.8  6.4  2.0  Iris-virginica     1\n",
      "Name: count, Length: 146, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 通过value_counts()函数对类别型变量进行频数统计\n",
    "print(df.value_counts())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "bce19544-2ac7-4a67-b8e4-6dffe1e9d181",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>5.1</th>\n",
       "      <th>3.5</th>\n",
       "      <th>1.4</th>\n",
       "      <th>0.2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.4</td>\n",
       "      <td>3.9</td>\n",
       "      <td>1.7</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   5.1  3.5  1.4  0.2\n",
       "0  4.9  3.0  1.4  0.2\n",
       "1  4.7  3.2  1.3  0.2\n",
       "2  4.6  3.1  1.5  0.2\n",
       "3  5.0  3.6  1.4  0.2\n",
       "4  5.4  3.9  1.7  0.4"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 利用concat()函数把转换后的数据对象全部合并在一起变成新的数据对象，命名为trainData_X\n",
    "trainData_X = df.iloc[0:,:4]\n",
    "trainData_X.head()\n",
    "# 72 columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "id": "686d634c-63b4-45b6-858e-3891be833296",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "target\n",
       "2    50\n",
       "0    50\n",
       "1    49\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 构造目标变量y的数据对象\n",
    "# 当前的目标变量good_bad的取值是'good'和'bad'，一般我们习惯把二分类的类别值编码为0和1，1一般表示类别比较少的那一类，他们往往是我们感兴趣的\n",
    "df['target'] = 0\n",
    "df.loc[(df.iloc[0:150,4] == 'Iris-setosa'), 'target'] = 1\n",
    "df.loc[(df.iloc[0:150,4] == 'Iris-versicolor'), 'target'] = 2\n",
    "trainData_y = df['target'] \n",
    "trainData_y.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "53efcbef-0490-4067-b63d-163ee9ab1e09",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 拆分训练集和测试集\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(trainData_X, trainData_y, test_size=0.3, random_state=1024)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "id": "ce88653b-cf91-433e-b3c7-99d56eed0082",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(104, 4) (45, 4)\n"
     ]
    }
   ],
   "source": [
    "print(X_train.shape,X_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "id": "5b95ddae-c5a4-4ca8-8d35-cfa17119e8e8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "target\n",
       "2    18\n",
       "1    14\n",
       "0    13\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "1b6b0716-2372-46d8-bf04-74ac7d9e4a41",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-10 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-10 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-10 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-10 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-10 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-10 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-10 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-10 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-10 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-10 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-10 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-10 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-10 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-10 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-10 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-10 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-10 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-10 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-10 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-10 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-10 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-10 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-10 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-10 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-10 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-10 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-10\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>DecisionTreeClassifier(max_depth=10)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" checked><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;DecisionTreeClassifier<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.tree.DecisionTreeClassifier.html\">?<span>Documentation for DecisionTreeClassifier</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>DecisionTreeClassifier(max_depth=10)</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "DecisionTreeClassifier(max_depth=10)"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "\n",
    "dtree = DecisionTreeClassifier(max_depth=10)\n",
    "dtree.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "id": "fd900bd4-fa1a-4118-b088-4051e199491a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.02167269, 0.02167269, 0.89928575, 0.05736888])"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dtree.feature_importances_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "id": "580eed69-3297-4302-b682-bde17a2c23c2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature</th>\n",
       "      <th>importance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.4</td>\n",
       "      <td>0.899286</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.2</td>\n",
       "      <td>0.057369</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.1</td>\n",
       "      <td>0.021673</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3.5</td>\n",
       "      <td>0.021673</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  feature  importance\n",
       "2     1.4    0.899286\n",
       "3     0.2    0.057369\n",
       "0     5.1    0.021673\n",
       "1     3.5    0.021673"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看特征的重要性排序\n",
    "import pandas as pd\n",
    "features = pd.DataFrame()\n",
    "features['feature'] = X_train.columns\n",
    "features['importance'] = dtree.feature_importances_\n",
    "features = features.sort_values(by =['importance'], ascending=False)\n",
    "\n",
    "# 显示最重要的前10个特征变量\n",
    "features.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "id": "0a251803-ee21-4eca-821a-5228f2d80892",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.93      1.00      0.96        13\n",
      "           1       1.00      1.00      1.00        14\n",
      "           2       1.00      0.94      0.97        18\n",
      "\n",
      "    accuracy                           0.98        45\n",
      "   macro avg       0.98      0.98      0.98        45\n",
      "weighted avg       0.98      0.98      0.98        45\n",
      "\n",
      "[[13  0  0]\n",
      " [ 0 14  0]\n",
      " [ 1  0 17]]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import roc_curve\n",
    "from sklearn.metrics import auc\n",
    "\n",
    "# 利用模型对测试集进行预测，输出target预测标签值和概率\n",
    "y_test_pred = dtree.predict(X_test)\n",
    "y_test_prob = dtree.predict_proba(X_test)\n",
    "\n",
    "# 分类评估汇总报告classification_report\n",
    "print(classification_report(y_test,y_test_pred))\n",
    "\n",
    "# 误分类矩阵 confusion_matrix\n",
    "print(confusion_matrix(y_test,y_test_pred))\n",
    "                       \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "60374a8c-0962-41c4-b2d0-ae740fc347e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import roc_curve\n",
    "from sklearn.metrics import auc\n",
    "\n",
    "# 利用模型对测试集进行预测，输出target预测标签值和概率\n",
    "y_test_pred = dtree.predict(X_test)\n",
    "y_test_prob = dtree.predict_proba(X_test)\n",
    "\n",
    "# 分类评估汇总报告classification_report\n",
    "print(classification_report(y_test,y_test_pred))\n",
    "\n",
    "# 误分类矩阵 confusion_matrix\n",
    "print(confusion_matrix(y_test,y_test_pred))\n",
    "                       \n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5d318864-2f0b-40c0-ac08-77caf6c6e925",
   "metadata": {},
   "source": [
    "## 作业"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "id": "37db02ca-3de8-4168-bcd5-1bc7a06d752c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "from sklearn import tree\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import roc_curve\n",
    "from sklearn.metrics import auc\n",
    "\n",
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = \"all\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "id": "3814ddc3-edb7-49a5-861f-a46de86d7bae",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "      <th>12</th>\n",
       "      <th>13</th>\n",
       "      <th>14</th>\n",
       "      <th>15</th>\n",
       "      <th>16</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>47</td>\n",
       "      <td>100</td>\n",
       "      <td>27</td>\n",
       "      <td>81</td>\n",
       "      <td>57</td>\n",
       "      <td>37</td>\n",
       "      <td>26</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>56</td>\n",
       "      <td>53</td>\n",
       "      <td>100</td>\n",
       "      <td>90</td>\n",
       "      <td>40</td>\n",
       "      <td>98</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>89</td>\n",
       "      <td>27</td>\n",
       "      <td>100</td>\n",
       "      <td>42</td>\n",
       "      <td>75</td>\n",
       "      <td>29</td>\n",
       "      <td>45</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>37</td>\n",
       "      <td>0</td>\n",
       "      <td>69</td>\n",
       "      <td>2</td>\n",
       "      <td>100</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>57</td>\n",
       "      <td>31</td>\n",
       "      <td>68</td>\n",
       "      <td>72</td>\n",
       "      <td>90</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>76</td>\n",
       "      <td>75</td>\n",
       "      <td>50</td>\n",
       "      <td>51</td>\n",
       "      <td>28</td>\n",
       "      <td>25</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>100</td>\n",
       "      <td>7</td>\n",
       "      <td>92</td>\n",
       "      <td>5</td>\n",
       "      <td>68</td>\n",
       "      <td>19</td>\n",
       "      <td>45</td>\n",
       "      <td>86</td>\n",
       "      <td>34</td>\n",
       "      <td>100</td>\n",
       "      <td>45</td>\n",
       "      <td>74</td>\n",
       "      <td>23</td>\n",
       "      <td>67</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>67</td>\n",
       "      <td>49</td>\n",
       "      <td>83</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>81</td>\n",
       "      <td>80</td>\n",
       "      <td>60</td>\n",
       "      <td>60</td>\n",
       "      <td>40</td>\n",
       "      <td>40</td>\n",
       "      <td>33</td>\n",
       "      <td>20</td>\n",
       "      <td>47</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7489</th>\n",
       "      <td>0</td>\n",
       "      <td>82</td>\n",
       "      <td>9</td>\n",
       "      <td>59</td>\n",
       "      <td>56</td>\n",
       "      <td>34</td>\n",
       "      <td>41</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>30</td>\n",
       "      <td>3</td>\n",
       "      <td>67</td>\n",
       "      <td>42</td>\n",
       "      <td>96</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7490</th>\n",
       "      <td>49</td>\n",
       "      <td>100</td>\n",
       "      <td>0</td>\n",
       "      <td>70</td>\n",
       "      <td>24</td>\n",
       "      <td>56</td>\n",
       "      <td>100</td>\n",
       "      <td>65</td>\n",
       "      <td>86</td>\n",
       "      <td>85</td>\n",
       "      <td>44</td>\n",
       "      <td>77</td>\n",
       "      <td>21</td>\n",
       "      <td>38</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7491</th>\n",
       "      <td>100</td>\n",
       "      <td>98</td>\n",
       "      <td>60</td>\n",
       "      <td>100</td>\n",
       "      <td>24</td>\n",
       "      <td>87</td>\n",
       "      <td>3</td>\n",
       "      <td>58</td>\n",
       "      <td>35</td>\n",
       "      <td>51</td>\n",
       "      <td>58</td>\n",
       "      <td>26</td>\n",
       "      <td>36</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7492</th>\n",
       "      <td>59</td>\n",
       "      <td>65</td>\n",
       "      <td>91</td>\n",
       "      <td>100</td>\n",
       "      <td>84</td>\n",
       "      <td>96</td>\n",
       "      <td>72</td>\n",
       "      <td>50</td>\n",
       "      <td>51</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>45</td>\n",
       "      <td>1</td>\n",
       "      <td>100</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7493</th>\n",
       "      <td>0</td>\n",
       "      <td>78</td>\n",
       "      <td>29</td>\n",
       "      <td>100</td>\n",
       "      <td>94</td>\n",
       "      <td>86</td>\n",
       "      <td>70</td>\n",
       "      <td>48</td>\n",
       "      <td>42</td>\n",
       "      <td>11</td>\n",
       "      <td>32</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "      <td>36</td>\n",
       "      <td>100</td>\n",
       "      <td>40</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>7494 rows × 17 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       0    1   2    3    4    5    6    7   8   9    10  11   12  13   14  \\\n",
       "0      47  100  27   81   57   37   26    0   0  23   56  53  100  90   40   \n",
       "1       0   89  27  100   42   75   29   45  15  15   37   0   69   2  100   \n",
       "2       0   57  31   68   72   90  100  100  76  75   50  51   28  25   16   \n",
       "3       0  100   7   92    5   68   19   45  86  34  100  45   74  23   67   \n",
       "4       0   67  49   83  100  100   81   80  60  60   40  40   33  20   47   \n",
       "...   ...  ...  ..  ...  ...  ...  ...  ...  ..  ..  ...  ..  ...  ..  ...   \n",
       "7489    0   82   9   59   56   34   41    0  10  30    3  67   42  96  100   \n",
       "7490   49  100   0   70   24   56  100   65  86  85   44  77   21  38    6   \n",
       "7491  100   98  60  100   24   87    3   58  35  51   58  26   36   0    0   \n",
       "7492   59   65  91  100   84   96   72   50  51   8    0   0   45   1  100   \n",
       "7493    0   78  29  100   94   86   70   48  42  11   32   0   25  36  100   \n",
       "\n",
       "       15  16  \n",
       "0      98   8  \n",
       "1       6   2  \n",
       "2       0   1  \n",
       "3       0   4  \n",
       "4       0   1  \n",
       "...   ...  ..  \n",
       "7489  100   5  \n",
       "7490    0   4  \n",
       "7491    5   5  \n",
       "7492    0   1  \n",
       "7493   40   7  \n",
       "\n",
       "[7494 rows x 17 columns]"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('../homework1/pendigits.txt', header = None)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "id": "48a42648-8606-43ff-90b0-cb95d3c5dd5a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 7494 entries, 0 to 7493\n",
      "Data columns (total 17 columns):\n",
      " #   Column  Non-Null Count  Dtype\n",
      "---  ------  --------------  -----\n",
      " 0   0       7494 non-null   int64\n",
      " 1   1       7494 non-null   int64\n",
      " 2   2       7494 non-null   int64\n",
      " 3   3       7494 non-null   int64\n",
      " 4   4       7494 non-null   int64\n",
      " 5   5       7494 non-null   int64\n",
      " 6   6       7494 non-null   int64\n",
      " 7   7       7494 non-null   int64\n",
      " 8   8       7494 non-null   int64\n",
      " 9   9       7494 non-null   int64\n",
      " 10  10      7494 non-null   int64\n",
      " 11  11      7494 non-null   int64\n",
      " 12  12      7494 non-null   int64\n",
      " 13  13      7494 non-null   int64\n",
      " 14  14      7494 non-null   int64\n",
      " 15  15      7494 non-null   int64\n",
      " 16  16      7494 non-null   int64\n",
      "dtypes: int64(17)\n",
      "memory usage: 995.4 KB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "      <th>12</th>\n",
       "      <th>13</th>\n",
       "      <th>14</th>\n",
       "      <th>15</th>\n",
       "      <th>16</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "      <td>7494.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>37.384307</td>\n",
       "      <td>84.679343</td>\n",
       "      <td>40.005604</td>\n",
       "      <td>82.889512</td>\n",
       "      <td>50.878303</td>\n",
       "      <td>65.044436</td>\n",
       "      <td>51.471844</td>\n",
       "      <td>44.599680</td>\n",
       "      <td>57.129971</td>\n",
       "      <td>34.069122</td>\n",
       "      <td>61.417401</td>\n",
       "      <td>35.782092</td>\n",
       "      <td>54.699760</td>\n",
       "      <td>35.800774</td>\n",
       "      <td>46.813718</td>\n",
       "      <td>28.565386</td>\n",
       "      <td>4.430878</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>33.322024</td>\n",
       "      <td>16.848420</td>\n",
       "      <td>26.256025</td>\n",
       "      <td>19.638582</td>\n",
       "      <td>34.927201</td>\n",
       "      <td>27.377341</td>\n",
       "      <td>30.680075</td>\n",
       "      <td>30.659478</td>\n",
       "      <td>33.680340</td>\n",
       "      <td>27.459989</td>\n",
       "      <td>37.130762</td>\n",
       "      <td>27.495836</td>\n",
       "      <td>22.599781</td>\n",
       "      <td>33.223611</td>\n",
       "      <td>41.531794</td>\n",
       "      <td>35.811094</td>\n",
       "      <td>2.876981</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>5.000000</td>\n",
       "      <td>76.000000</td>\n",
       "      <td>20.000000</td>\n",
       "      <td>70.000000</td>\n",
       "      <td>17.000000</td>\n",
       "      <td>48.000000</td>\n",
       "      <td>28.000000</td>\n",
       "      <td>22.000000</td>\n",
       "      <td>30.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>25.000000</td>\n",
       "      <td>12.000000</td>\n",
       "      <td>41.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>31.000000</td>\n",
       "      <td>89.000000</td>\n",
       "      <td>39.000000</td>\n",
       "      <td>89.000000</td>\n",
       "      <td>56.000000</td>\n",
       "      <td>71.000000</td>\n",
       "      <td>54.000000</td>\n",
       "      <td>42.000000</td>\n",
       "      <td>60.000000</td>\n",
       "      <td>33.000000</td>\n",
       "      <td>74.000000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>53.000000</td>\n",
       "      <td>28.000000</td>\n",
       "      <td>39.000000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>4.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>61.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>58.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>81.000000</td>\n",
       "      <td>86.000000</td>\n",
       "      <td>75.000000</td>\n",
       "      <td>65.000000</td>\n",
       "      <td>88.000000</td>\n",
       "      <td>55.000000</td>\n",
       "      <td>98.000000</td>\n",
       "      <td>57.000000</td>\n",
       "      <td>69.000000</td>\n",
       "      <td>48.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>7.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>9.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                0            1            2            3            4   \\\n",
       "count  7494.000000  7494.000000  7494.000000  7494.000000  7494.000000   \n",
       "mean     37.384307    84.679343    40.005604    82.889512    50.878303   \n",
       "std      33.322024    16.848420    26.256025    19.638582    34.927201   \n",
       "min       0.000000     0.000000     0.000000     0.000000     0.000000   \n",
       "25%       5.000000    76.000000    20.000000    70.000000    17.000000   \n",
       "50%      31.000000    89.000000    39.000000    89.000000    56.000000   \n",
       "75%      61.000000   100.000000    58.000000   100.000000    81.000000   \n",
       "max     100.000000   100.000000   100.000000   100.000000   100.000000   \n",
       "\n",
       "                5            6            7            8            9   \\\n",
       "count  7494.000000  7494.000000  7494.000000  7494.000000  7494.000000   \n",
       "mean     65.044436    51.471844    44.599680    57.129971    34.069122   \n",
       "std      27.377341    30.680075    30.659478    33.680340    27.459989   \n",
       "min       0.000000     0.000000     0.000000     0.000000     0.000000   \n",
       "25%      48.000000    28.000000    22.000000    30.000000     7.000000   \n",
       "50%      71.000000    54.000000    42.000000    60.000000    33.000000   \n",
       "75%      86.000000    75.000000    65.000000    88.000000    55.000000   \n",
       "max     100.000000   100.000000   100.000000   100.000000   100.000000   \n",
       "\n",
       "                10           11           12           13           14  \\\n",
       "count  7494.000000  7494.000000  7494.000000  7494.000000  7494.000000   \n",
       "mean     61.417401    35.782092    54.699760    35.800774    46.813718   \n",
       "std      37.130762    27.495836    22.599781    33.223611    41.531794   \n",
       "min       0.000000     0.000000     0.000000     0.000000     0.000000   \n",
       "25%      25.000000    12.000000    41.000000     7.000000     0.000000   \n",
       "50%      74.000000    32.000000    53.000000    28.000000    39.000000   \n",
       "75%      98.000000    57.000000    69.000000    48.000000   100.000000   \n",
       "max     100.000000   100.000000   100.000000   100.000000   100.000000   \n",
       "\n",
       "                15           16  \n",
       "count  7494.000000  7494.000000  \n",
       "mean     28.565386     4.430878  \n",
       "std      35.811094     2.876981  \n",
       "min       0.000000     0.000000  \n",
       "25%       0.000000     2.000000  \n",
       "50%       8.000000     4.000000  \n",
       "75%      51.000000     7.000000  \n",
       "max     100.000000     9.000000  "
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "0     0\n",
       "1     0\n",
       "2     0\n",
       "3     0\n",
       "4     0\n",
       "5     0\n",
       "6     0\n",
       "7     0\n",
       "8     0\n",
       "9     0\n",
       "10    0\n",
       "11    0\n",
       "12    0\n",
       "13    0\n",
       "14    0\n",
       "15    0\n",
       "16    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "0       False\n",
       "1       False\n",
       "2       False\n",
       "3       False\n",
       "4       False\n",
       "        ...  \n",
       "7489    False\n",
       "7490    False\n",
       "7491    False\n",
       "7492    False\n",
       "7493    False\n",
       "Length: 7494, dtype: bool"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "df.info() \n",
    "df.describe() \n",
    "df.isnull().sum() \n",
    "df.duplicated()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "id": "f619e602-b1d1-456d-b668-a92f8baa65d0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    1    2    3    4   5   6    7    8    9    10   11   12  13  14   15   16\n",
      "0    0    18   27   48  49  76   73   100  100  94   73   82  40  82   9    1     1\n",
      "49   93   0    55   28  74  63   100  62   50   59   3    24  1   100  0    1     1\n",
      "     100  8    93   0   60  45   82   61   57   100  28   73  0   8    2    9     1\n",
      "          5    87   15  59  92   53   100  84   69   57   35  28  0    0    4     1\n",
      "          0    70   24  56  100  65   86   85   44   77   21  38  6    0    4     1\n",
      "                                                                                 ..\n",
      "15   72   0    39   57  42  43   0    16   15   11   62   36  93  100  100  5     1\n",
      "     71   38   100  73  89  59   56   30   28   0    0    49  5   100  17   2     1\n",
      "     70   20   32   54  0   96   24   100  66   68   100  16  92  0    52   0     1\n",
      "     67   44   100  51  91  44   44   29   0    0    1    50  4   100  2    1     1\n",
      "100  100  100  96   79  80  50   64   36   48   14   31   0   16  7    0    1     1\n",
      "Name: count, Length: 7494, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(df.value_counts())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 205,
   "id": "f2f474e0-b9c3-41b9-8a89-5af6084ed2b4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "      <th>12</th>\n",
       "      <th>13</th>\n",
       "      <th>14</th>\n",
       "      <th>15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>47</td>\n",
       "      <td>100</td>\n",
       "      <td>27</td>\n",
       "      <td>81</td>\n",
       "      <td>57</td>\n",
       "      <td>37</td>\n",
       "      <td>26</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>56</td>\n",
       "      <td>53</td>\n",
       "      <td>100</td>\n",
       "      <td>90</td>\n",
       "      <td>40</td>\n",
       "      <td>98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>89</td>\n",
       "      <td>27</td>\n",
       "      <td>100</td>\n",
       "      <td>42</td>\n",
       "      <td>75</td>\n",
       "      <td>29</td>\n",
       "      <td>45</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>37</td>\n",
       "      <td>0</td>\n",
       "      <td>69</td>\n",
       "      <td>2</td>\n",
       "      <td>100</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>57</td>\n",
       "      <td>31</td>\n",
       "      <td>68</td>\n",
       "      <td>72</td>\n",
       "      <td>90</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>76</td>\n",
       "      <td>75</td>\n",
       "      <td>50</td>\n",
       "      <td>51</td>\n",
       "      <td>28</td>\n",
       "      <td>25</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>100</td>\n",
       "      <td>7</td>\n",
       "      <td>92</td>\n",
       "      <td>5</td>\n",
       "      <td>68</td>\n",
       "      <td>19</td>\n",
       "      <td>45</td>\n",
       "      <td>86</td>\n",
       "      <td>34</td>\n",
       "      <td>100</td>\n",
       "      <td>45</td>\n",
       "      <td>74</td>\n",
       "      <td>23</td>\n",
       "      <td>67</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>67</td>\n",
       "      <td>49</td>\n",
       "      <td>83</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>81</td>\n",
       "      <td>80</td>\n",
       "      <td>60</td>\n",
       "      <td>60</td>\n",
       "      <td>40</td>\n",
       "      <td>40</td>\n",
       "      <td>33</td>\n",
       "      <td>20</td>\n",
       "      <td>47</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0    1   2    3    4    5    6    7   8   9   10  11   12  13   14  15\n",
       "0  47  100  27   81   57   37   26    0   0  23   56  53  100  90   40  98\n",
       "1   0   89  27  100   42   75   29   45  15  15   37   0   69   2  100   6\n",
       "2   0   57  31   68   72   90  100  100  76  75   50  51   28  25   16   0\n",
       "3   0  100   7   92    5   68   19   45  86  34  100  45   74  23   67   0\n",
       "4   0   67  49   83  100  100   81   80  60  60   40  40   33  20   47   0"
      ]
     },
     "execution_count": 205,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trainData_X = df.iloc[ :, :16]\n",
    "trainData_X.head()\n",
    "# 构造目标变量y的数据对象\n",
    "trainData_y=df.iloc[:,16]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 199,
   "id": "4c707783-00bc-4dcb-8cd2-35ee81981e0f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 拆分训练集和测试集\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(trainData_X, trainData_y, test_size=0.3, random_state=1024)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 200,
   "id": "7bfcadf1-afab-4b0b-bc17-b860c887401d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(5245, 16) (2249, 16)\n"
     ]
    }
   ],
   "source": [
    "print(X_train.shape,X_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "id": "db9d8b52-0480-4ca0-a009-5c8a6daf8d0f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "16\n",
       "0    245\n",
       "1    244\n",
       "2    244\n",
       "4    232\n",
       "7    228\n",
       "8    226\n",
       "6    216\n",
       "5    212\n",
       "9    211\n",
       "3    191\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 201,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "id": "62c73df3-af98-4fca-8275-455b85293c2c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-24 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-24 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-24 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-24 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-24 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-24 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-24 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-24 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-24 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-24 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-24 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-24 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-24 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-24 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-24 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-24 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-24 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-24 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-24 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-24 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-24 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-24 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-24 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-24 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-24 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-24 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-24\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>DecisionTreeClassifier(max_depth=10)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-26\" type=\"checkbox\" checked><label for=\"sk-estimator-id-26\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;DecisionTreeClassifier<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.tree.DecisionTreeClassifier.html\">?<span>Documentation for DecisionTreeClassifier</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>DecisionTreeClassifier(max_depth=10)</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "DecisionTreeClassifier(max_depth=10)"
      ]
     },
     "execution_count": 202,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "array([0.02314707, 0.01558122, 0.01092238, 0.08167414, 0.08728863,\n",
       "       0.06412963, 0.05831401, 0.04134534, 0.08872278, 0.10301123,\n",
       "       0.08530431, 0.00534825, 0.01443985, 0.12696839, 0.11089798,\n",
       "       0.08290479])"
      ]
     },
     "execution_count": 202,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "\n",
    "dtree = DecisionTreeClassifier(max_depth=10)\n",
    "dtree.fit(X_train,y_train)\n",
    "dtree.feature_importances_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "id": "f21a1f62-8b7d-4ae4-a9ac-dc9509ba42e1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature</th>\n",
       "      <th>importance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>13</td>\n",
       "      <td>0.126968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>14</td>\n",
       "      <td>0.110898</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>0.103011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>0.088723</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>0.087289</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>0.085304</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>0.082905</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>0.081674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>0.064130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>0.058314</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>0.041345</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0.023147</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0.015581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12</td>\n",
       "      <td>0.014440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>0.010922</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11</td>\n",
       "      <td>0.005348</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   feature  importance\n",
       "13      13    0.126968\n",
       "14      14    0.110898\n",
       "9        9    0.103011\n",
       "8        8    0.088723\n",
       "4        4    0.087289\n",
       "10      10    0.085304\n",
       "15      15    0.082905\n",
       "3        3    0.081674\n",
       "5        5    0.064130\n",
       "6        6    0.058314\n",
       "7        7    0.041345\n",
       "0        0    0.023147\n",
       "1        1    0.015581\n",
       "12      12    0.014440\n",
       "2        2    0.010922\n",
       "11      11    0.005348"
      ]
     },
     "execution_count": 203,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看特征的重要性排序\n",
    "import pandas as pd\n",
    "features = pd.DataFrame()\n",
    "features['feature'] = X_train.columns\n",
    "features['importance'] = dtree.feature_importances_\n",
    "features = features.sort_values(by =['importance'], ascending=False)\n",
    "\n",
    "# 显示最重要的前10个特征变量\n",
    "features.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 204,
   "id": "73950cd4-4552-45f7-b00a-0e2e9b8e5c45",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.98      0.98      0.98       245\n",
      "           1       0.91      0.92      0.91       244\n",
      "           2       0.95      0.93      0.94       244\n",
      "           3       0.91      0.97      0.94       191\n",
      "           4       0.99      0.97      0.98       232\n",
      "           5       0.98      0.96      0.97       212\n",
      "           6       0.98      0.98      0.98       216\n",
      "           7       0.96      0.95      0.95       228\n",
      "           8       0.96      0.96      0.96       226\n",
      "           9       0.96      0.97      0.96       211\n",
      "\n",
      "    accuracy                           0.96      2249\n",
      "   macro avg       0.96      0.96      0.96      2249\n",
      "weighted avg       0.96      0.96      0.96      2249\n",
      "\n",
      "[[240   0   0   0   0   0   0   0   4   1]\n",
      " [  0 224  11   6   0   0   1   1   0   1]\n",
      " [  0  10 227   2   0   1   0   4   0   0]\n",
      " [  0   1   1 186   0   1   0   2   0   0]\n",
      " [  0   0   0   2 226   0   0   0   0   4]\n",
      " [  0   5   0   3   0 203   0   0   0   1]\n",
      " [  1   2   0   0   0   1 211   1   0   0]\n",
      " [  0   2   0   3   0   0   1 216   5   1]\n",
      " [  4   0   0   0   0   1   2   2 217   0]\n",
      " [  0   2   0   2   2   1   0   0   0 204]]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import roc_curve\n",
    "from sklearn.metrics import auc\n",
    "\n",
    "# 利用模型对测试集进行预测，输出target预测标签值和概率\n",
    "y_test_pred = dtree.predict(X_test)\n",
    "y_test_prob = dtree.predict_proba(X_test)\n",
    "\n",
    "# 分类评估汇总报告classification_report\n",
    "print(classification_report(y_test,y_test_pred))\n",
    "\n",
    "# 误分类矩阵 confusion_matrix\n",
    "print(confusion_matrix(y_test,y_test_pred))\n",
    "                       \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b9203692-9a67-428e-af6a-e1bd205787ed",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2f09486b-a548-4f43-8f15-85fa850decd0",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8e7a73cc-3ac9-4ed3-9bed-1637c32cba5d",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
